#/******************************************************************************
#*
#* Freescale Semiconductor Inc.
#* (c) Copyright 2004-2006 Freescale Semiconductor, Inc.
#* ALL RIGHTS RESERVED.
#*
#*******************************************************************************
#*
#* $File Name:  bitrev_table_16bit.s$
#*
#* $Date:       Jun-21-2006$
#*
#* $Version:    1.0.1.0$
#*
#* Description: bit-reverse permutation using seed table
#*
#*
#*   void bitrev_table_16bit(unsigned int n, short *inout_buffer, unsigned short *seed_table);
#*
#*******************************************************************************/

    .text
    .globl bitrev_table_16bit
bitrev_table_16bit:

#/* constants */
ONE_ITEM_TABLE_BYTESIZE:    .set    2
ONE_ITEM_BYTESIZE:    .set          4
ONE_ITEM_BYTESIZE_EXP:    .set      2    #/* 2^ONE_ITEM_BYTESIZE_EXP = ONE_ITEM_BYTESIZE */
TABLE_MULT_FACTOR:    .set          ONE_ITEM_BYTESIZE

#/* macros */
load:   .macro a,b
            .if     ONE_ITEM_BYTESIZE == 8
                evldw a,b
            .elseif ONE_ITEM_BYTESIZE == 4
                lwz   a,b
            .else
                lhz   a,b
            .endif
        .endm
loadx:  .macro a,b,c
            .if     ONE_ITEM_BYTESIZE == 8
                evldwx a,b,c
            .elseif ONE_ITEM_BYTESIZE == 4
                lwzx   a,b,c
            .else
                lhzx   a,b,c
            .endif
        .endm
store:  .macro a,b
            .if     ONE_ITEM_BYTESIZE == 8
                evstdw a,b
            .elseif ONE_ITEM_BYTESIZE == 4
                stw    a,b
            .else
                sth    a,b
            .endif
        .endm
storex: .macro a,b,c
            .if     ONE_ITEM_BYTESIZE == 8
                evstdwx a,b,c
            .elseif ONE_ITEM_BYTESIZE == 4
                stwx    a,b,c
            .else
                sthx    a,b,c
            .endif
        .endm

#<#/* registers */
n:    .set     3   #/* number of groups, B^P=N (N is FFT length), n=sqrt(N) if P is even or n=sqrt(N/B) if P is odd */
y:    .set     4   #/* in/out buffer */
seed:    .set     5   #/* seed table */
inner_loop_ctr:    .set     6
seed_offset_addr:    .set     7
seed_offset:    .set     8
seed_gN_addr:    .set     9
firstj:    .set     10
i_addr:    .set     11
i_addr_in:    .set     12
i_addr_in_inc:    .set     28
j_addr_offset:    .set     29
tmp_j:    .set     30
tmp_i:    .set     31
#>#

#/* code */
            stwu      r1,-32(r1)
            stw       r31,28(r1)
            stw       r30,24(r1)
            stw       r29,20(r1)
            stw       r28,16(r1)

            addi      r7,r5,ONE_ITEM_TABLE_BYTESIZE;
            lhz       r8,0(r7);
            addi      r11,r4,ONE_ITEM_BYTESIZE;
            slwi      r28,r3,ONE_ITEM_BYTESIZE_EXP;
            subi      r31,r3,1;
            mtctr     r31;
outer_loop:
            mullw     r10,r3,r8;
            load      r31,0(r11);
            addi      r9,r5,ONE_ITEM_TABLE_BYTESIZE;
            loadx     r30,r4,r10;
            li        r6,1*TABLE_MULT_FACTOR;
            storex    r31,r4,r10;
            cmplwi    r8,1*TABLE_MULT_FACTOR;
            store     r30,0(r11);
            add       r12, r11, r28;
            beq       skip_inner_loop;
inner_loop:
            lhz       r30, 0(r9);
            addi      r6, r6, 1*TABLE_MULT_FACTOR;
            addi      r9, r9, ONE_ITEM_TABLE_BYTESIZE;
            add       r29,r10,r30;
            loadx     r30,r4,r29;
            load      r31, 0(r12);
            cmplw     r6,r8;
            store     r30,0(r12);
            add       r12, r12, r28;
            storex    r31,r4,r29;
            blt       inner_loop;                        #/* bdnz */
skip_inner_loop:
            addi      r7,r7,ONE_ITEM_TABLE_BYTESIZE;
            lhz       r8,0(r7);
            addi      r11,r11,ONE_ITEM_BYTESIZE;
            bdnz      outer_loop;

            lwz       r31,28(r1)
            lwz       r30,24(r1)
            lwz       r29,20(r1)
            lwz       r28,16(r1)
            addi      r1,r1,32
            blr
